# 19BCE1353, Sahil Sachin Donde
# CSE3505 Foundations of Data Analytics - Project (J-Component)
# Final (Review 3)
# Faculty: - Dr. TULASI PRASAD SARIKI
# F1 Slot

# Individual Project

# Title: -Statistical Data Analysis of the Stock Price using Data Visualization

#start

# Time Series Plotting
library(ggplot2)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dygraphs)

require("quantmod")
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
stock_list <- c("TM")

start_date <- Sys.Date()-5397
end_date <- Sys.Date()
start_date
## [1] "2007-03-04"
master_df <- NULL
for (idx in seq(length(stock_list))){
  stock_index = stock_list[idx]
  getSymbols(stock_index, verbose = TRUE, src = "yahoo", 
             from=start_date,to=end_date)
  temp_df = as.data.frame(get(stock_index))
  temp_df$Date = row.names(temp_df)
  temp_df$Index = stock_index
  row.names(temp_df) = NULL
  colnames(temp_df) = c("Open", "High", "Low", "Close", 
                        "Volume", "Adjusted", "Date", "Index")
  temp_df = temp_df[c("Date", "Index", "Open", "High", 
                      "Low", "Close", "Volume", "Adjusted")]
  master_df = rbind(master_df, temp_df)
}
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## downloading  TM .....
## 
## done.
dim(master_df)
## [1] 3722    8
head(master_df,20)
##          Date Index   Open   High    Low  Close Volume Adjusted
## 1  2007-03-05    TM 128.30 129.49 127.78 127.91 660400 101.9207
## 2  2007-03-06    TM 132.45 132.88 131.38 132.73 926800 105.7614
## 3  2007-03-07    TM 133.24 133.78 132.72 132.92 518300 105.9128
## 4  2007-03-08    TM 134.98 135.63 134.00 134.44 658300 107.1240
## 5  2007-03-09    TM 134.30 134.70 132.89 133.14 465900 106.0881
## 6  2007-03-12    TM 133.14 133.72 132.20 133.40 583900 106.2953
## 7  2007-03-13    TM 133.40 133.40 130.50 130.75 511100 104.1837
## 8  2007-03-14    TM 130.00 130.70 127.82 129.98 842400 103.5701
## 9  2007-03-15    TM 130.48 131.06 130.11 130.74 419600 104.1757
## 10 2007-03-16    TM 130.03 131.13 129.64 129.90 342100 103.5064
## 11 2007-03-19    TM 130.46 131.63 130.43 131.55 314500 104.8212
## 12 2007-03-20    TM 131.45 132.50 131.45 132.05 327400 105.2196
## 13 2007-03-21    TM 131.98 133.53 131.02 133.53 707200 106.3989
## 14 2007-03-22    TM 132.76 133.46 132.03 132.80 537600 105.8172
## 15 2007-03-23    TM 133.60 133.67 132.87 133.18 538200 106.1200
## 16 2007-03-26    TM 132.75 132.78 131.45 132.70 244800 105.7375
## 17 2007-03-27    TM 131.50 131.50 130.52 131.26 301000 104.5901
## 18 2007-03-28    TM 130.56 130.56 128.63 128.86 875300 103.5073
## 19 2007-03-29    TM 129.78 130.18 128.74 129.95 435800 104.3828
## 20 2007-03-30    TM 128.50 129.19 127.91 128.16 565700 102.9450
summary(master_df)
##      Date              Index                Open             High       
##  Length:3722        Length:3722        Min.   : 57.39   Min.   : 58.38  
##  Class :character   Class :character   1st Qu.: 83.81   1st Qu.: 84.21  
##  Mode  :character   Mode  :character   Median :115.39   Median :116.06  
##                                        Mean   :110.57   Mean   :111.17  
##                                        3rd Qu.:127.07   3rd Qu.:127.73  
##                                        Max.   :187.00   Max.   :188.18  
##       Low             Close            Volume            Adjusted     
##  Min.   : 55.41   Min.   : 57.68   Min.   :   48400   Min.   : 48.15  
##  1st Qu.: 83.32   1st Qu.: 83.86   1st Qu.:  199625   1st Qu.: 72.51  
##  Median :114.78   Median :115.31   Median :  336350   Median :107.94  
##  Mean   :109.87   Mean   :110.55   Mean   :  470420   Mean   :103.52  
##  3rd Qu.:126.35   3rd Qu.:127.17   3rd Qu.:  567675   3rd Qu.:123.79  
##  Max.   :186.90   Max.   :187.44   Max.   :18582700   Max.   :187.44
boxplot(master_df$Close,main="Box plot for detecting outliers in Closing Price",xlab="Closing Price",border="brown",
        col="gold", horizontal=TRUE)

boxplot(master_df$High,main="Box plot for detecting outliers in Highest Price",xlab="Highest Price",border="purple",
        col="pink", horizontal=TRUE)

boxplot(master_df$Low,main="Box plot for detecting outliers in Lowest Price",xlab="Lowest Price",border="black",
        col="orange", horizontal=TRUE)

boxplot(master_df$Open,main="Box plot for detecting outliers in Opening Price",xlab="Opening Price",border="brown",
        col="cyan", horizontal=TRUE)

Close_xts <- xts(x = master_df$Close, order.by = as.POSIXct(master_df$Date))

# Make the chart
p <- dygraph(Close_xts,ylab="Close", 
             main="Tata Motors Closing Stock Prices") %>%
  dySeries("V1",label="Closing Price") %>%
  dyOptions(colors = c("blue")) %>%
  dyRangeSelector()
p
getSymbols("TM",src='yahoo')
## [1] "TM"
#print(TM$TM.Open)
df <- data.frame(Date=index(TM),coredata(TM))
df <- tail(df,30)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
fig <- df %>% plot_ly(x = ~Date, type="candlestick",
                      open = ~TM.Open, close = ~TM.Close,
                      high = ~TM.High, low = ~TM.Low) 
fig <- fig %>% layout(title = "Basic Candlestick Chart")

fig
#end

# start


TM = read.csv('C:/Users/sahil/Documents/Sahil/VIT Chennai/3rd Year/5th Semester/Projects/CSE3505 FDA/Review 2/TM.csv', header=TRUE)
dim(TM)
## [1] 3721   13
TM <- TM[!TM$RSI=="#N/A",]

print(master_df$Date[1])
## [1] "2007-03-05"
strDates <- TM$Date
#print(TM$Date)
typeof(strDates)
## [1] "character"
TM$Date <- as.Date(strDates, "%d-%m-%Y")
#print(TM$Date)
TM <- na.omit(TM) 
dim(TM)
## [1] 3544   13
Close <- xts(x = TM$Close, order.by = as.POSIXct(TM$Date))
RSI <- xts(x = TM$RSI, order.by = as.POSIXct(TM$Date))
Vol <- xts(x = TM$Volume/100000, order.by = as.POSIXct(TM$Date))

st <- cbind(Close,RSI,Vol)

# Make the chart
p <- dygraph(st,ylab="Close", 
             main="Tata Motors Closing Stock Prices") %>%
  dySeries("Close",label="Close") %>%
  dySeries("RSI",label="RSI") %>%
  dySeries("Vol",label="Vol") %>%
  dyOptions(colors = c("green","blue","brown")) %>%
  dyRangeSelector()
p
# Calculate 5 day moving average
TM$MA5 <- TTR::SMA( TM$Close, n = 5)

# Calculate 10 days moving average
TM$MA50 <- TTR::SMA( TM$Close, n = 50)

head(TM,20)
##    Sr       Date month year Index   Open   High    Low  Close  Volume Adjusted
## 1   1 2007-01-03   Jan 2007    TM 135.25 136.54 134.45 135.30  758600 107.8092
## 2   2 2007-01-04   Jan 2007    TM 136.65 137.97 135.64 137.77  842700 109.7773
## 3   3 2007-01-05   Jan 2007    TM 133.30 133.87 132.55 133.72 1068400 106.5502
## 4   4 2007-01-08   Jan 2007    TM 134.60 134.74 133.80 133.97  511600 106.7495
## 5   5 2007-01-09   Jan 2007    TM 132.17 132.94 131.24 132.16  645700 105.3072
## 6   6 2007-01-10   Jan 2007    TM 129.00 129.86 128.21 129.43  726200 103.1319
## 7   7 2007-01-11   Jan 2007    TM 128.00 130.43 127.81 128.78  803900 102.6140
## 8   8 2007-01-12   Jan 2007    TM 129.87 130.89 129.79 130.89  498500 104.2952
## 9   9 2007-01-16   Jan 2007    TM 131.49 132.49 130.75 131.21  641200 104.5503
## 10 10 2007-01-17   Jan 2007    TM 131.64 131.89 130.78 131.10  641100 104.4626
## 11 11 2007-01-18   Jan 2007    TM 132.20 132.95 131.43 132.08  554200 105.2435
## 12 12 2007-01-19   Jan 2007    TM 131.94 132.50 131.74 132.33  396500 105.4427
## 13 13 2007-01-22   Jan 2007    TM 132.27 132.27 130.42 131.21  524800 104.5503
## 14 14 2007-01-23   Jan 2007    TM 132.10 132.75 131.40 132.08  386000 105.2435
## 15 15 2007-01-24   Jan 2007    TM 134.48 135.60 133.98 135.55  653200 108.0084
## 16 16 2007-01-25   Jan 2007    TM 132.80 133.59 131.20 131.70  794100 104.9407
## 18 18 2007-01-29   Jan 2007    TM 131.85 132.50 131.42 131.70  450300 104.9407
## 20 20 2007-01-31   Jan 2007    TM 130.90 132.27 130.50 131.78  546600 105.0044
## 21 21 2007-02-01   Feb 2007    TM 133.00 133.57 132.55 133.28  376100 106.1996
## 22 22 2007-02-02   Feb 2007    TM 131.65 131.67 130.69 131.27  466700 104.5980
##      RSI   pSAR     MA5 MA50
## 1  62.57 164.77      NA   NA
## 2  58.08 168.09      NA   NA
## 3   57.4 172.23      NA   NA
## 4  51.41 175.79      NA   NA
## 5  48.99 178.86 134.584   NA
## 6  42.93 197.68 133.410   NA
## 7  52.45 197.31 131.612   NA
## 8  60.88 196.94 131.046   NA
## 9   62.7 196.58 130.494   NA
## 10 62.22 196.23 130.282   NA
## 11 64.58 195.89 130.812   NA
## 12 62.15 195.55 131.522   NA
## 13 63.13 195.22 131.586   NA
## 14 58.36 195.00 131.760   NA
## 15 60.46 194.68 132.650   NA
## 16 65.08 194.37 132.574   NA
## 18 61.95 193.76 132.448   NA
## 20 56.67 192.95 132.562   NA
## 21  61.3 192.18 132.802   NA
## 22 64.75 191.44 131.946   NA
# Now we plot the values in ggplot
pl <- ggplot(TM , aes(x = Date))
pl <- pl + geom_line(aes(y = Close, color = "Close"), group = 1)
pl <- pl + geom_line(aes(y = MA5, color = "MA5"),group = 1)
pl <- pl + geom_line(aes(y = MA50, color = "MA10"), group = 1)
pl <- pl +  theme_minimal()
#pl <- pl + theme(legend.title = "Moving Ave." )
pl <- pl + theme(legend.position = "top")
pl <- pl + labs(title ="Moving averages")
pl <- pl + labs(color="Prices")
pl
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 49 row(s) containing missing values (geom_path).

Close <- xts(x = TM$Close, order.by = as.POSIXct(TM$Date))
MA5 <- xts(x = TM$MA5, order.by = as.POSIXct(TM$Date))
MA50 <- xts(x = TM$MA50, order.by = as.POSIXct(TM$Date))

st <- cbind(Close,MA5,MA50)

# Make the chart
p <- dygraph(st,ylab="Close", 
             main="Tata Motors Closing Stock Prices") %>%
  dySeries("Close",label="Close") %>%
  dySeries("MA5",label="MA5") %>%
  dySeries("MA50",label="MA50") %>%
  dyOptions(colors = c("green","blue","brown")) %>%
  dyRangeSelector()
p
#end

# Review - 2 end

# Review -3 Start

Close <- xts(x = TM$Close, order.by = as.POSIXct(TM$Date))
RSI <- xts(x = TM$RSI, order.by = as.POSIXct(TM$Date))
pSAR <- xts(x = TM$MA50, order.by = as.POSIXct(TM$Date))

st <- cbind(Close,RSI,pSAR)

# Make the chart
p <- dygraph(st,ylab="Close", 
             main="Tata Motors Closing Stock Prices") %>%
  dySeries("Close",label="Close") %>%
  dySeries("RSI",label="RSI") %>%
  dySeries("pSAR",label="pSAR") %>%
  dyOptions(colors = c("green","blue","brown")) %>%
  dyRangeSelector()
p
TM$pSAR = TM$MA50

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(prophet)
## Loading required package: Rcpp
## Loading required package: rlang
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(ggplot2)

qplot(Sr, Close, data = TM)

ds <- TM$Date
y <- TM$Close
df <- data.frame(ds, y)

# Forecasting
m <- prophet(df)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
# Prediction
future <- make_future_dataframe(m, periods = 210)
forecast <- predict(m, future)

# Plot forecast
plot(m, forecast)

dyplot.prophet(m, forecast)
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# Then using plot function I plotted the graph using dyplot, it is an interactive graph which can help you to see more minute details

prophet_plot_components(m, forecast)

# Model performance
pred <- forecast$yhat[1:210]
pred
##   [1] 136.9572 137.0635 136.9436 136.9977 137.1125 136.9487 136.9853 136.7825
##   [9] 136.5435 136.2739 136.2099 135.9149 135.4696 135.4444 135.1616 135.1044
##  [17] 134.5860 134.4845 134.5456 134.4015 134.5185 134.6990 134.6210 134.7614
##  [25] 134.6774 134.8391 134.9853 134.8479 134.9044 134.4133 134.0947 133.9662
##  [33] 133.5905 132.8047 132.6337 132.1901 131.9582 131.5028 130.6315 130.4838
##  [41] 130.0880 129.9267 129.5625 129.0586 129.0531 128.8033 128.7880 128.5660
##  [49] 128.4338 128.5248 128.3543 128.4000 128.2201 128.1027 127.9524 127.9445
##  [57] 127.7025 127.3773 127.3735 127.1009 127.0420 126.3982 126.4125 126.1732
##  [65] 126.1621 125.9414 125.8230 125.9306 125.7859 125.8679 125.7356 125.8278
##  [73] 125.9794 125.8628 125.9557 125.8167 125.7850 125.6608 125.4196 125.0698
##  [81] 125.0502 124.7588 124.6789 124.3745 123.9441 123.9385 123.6825 123.6588
##  [89] 123.4306 123.3305 123.4604 123.3475 123.4708 123.3900 123.9393 123.9220
##  [97] 124.1216 124.0958 124.4281 124.6313 124.5522 124.6690 124.5411 124.4797
## [105] 124.5333 124.3012 124.2653 123.9887 123.5383 123.4908 123.1757 123.0765
## [113] 122.7572 122.3041 122.2954 122.0367 122.0093 121.7750 121.6282 121.7290
## [121] 121.6556 121.5192 121.6069 121.7611 121.6501 121.7525 121.6276 121.6741
## [129] 121.7936 121.6402 121.6944 121.5174 121.4033 121.4736 121.2754 121.2903
## [137] 121.0800 120.9043 120.9656 120.7628 120.7762 120.5664 120.3878 120.4414
## [145] 120.2255 120.2189 119.9814 119.6696 119.6626 119.2994 118.9847 118.4381
## [153] 118.3596 118.0122 117.8786 117.5227 116.9475 116.8963 116.5961 116.5297
## [161] 116.2604 116.1471 116.0049 116.1022 115.9985 116.2548 116.4851 116.4574
## [169] 116.6476 116.6120 116.9024 117.0831 116.9747 117.0541 116.8793 116.6079
## [177] 116.5652 116.2225 116.0616 115.6457 114.6969 113.9465 113.6418 113.1121
## [185] 112.0231 111.8096 111.3539 111.1399 110.7318 110.1593 110.1552 109.9197
## [193] 109.9320 109.7511 109.8182 109.9988 109.9278 110.0813 110.0163 110.2690
## [201] 110.4575 110.3693 110.4826 110.3569 110.3385 110.4193 110.2212 110.2265
## [209] 109.9983 109.7374
length(pred)
## [1] 210
actual <- m$history$y[1:210]
length(actual)
## [1] 210
actual
##   [1] 135.30 137.77 133.72 133.97 132.16 129.43 128.78 130.89 131.21 131.10
##  [11] 132.08 132.33 131.21 132.08 135.55 131.70 131.70 131.78 133.28 131.27
##  [21] 130.46 133.70 132.94 132.10 132.06 131.79 134.86 136.50 136.77 136.67
##  [31] 135.59 135.65 135.50 137.03 132.97 133.60 131.28 129.09 127.91 132.73
##  [41] 132.92 134.44 133.14 133.40 130.75 129.98 130.74 129.90 131.55 132.05
##  [51] 133.53 132.80 133.18 132.70 128.86 129.95 128.16 126.92 126.98 127.74
##  [61] 126.49 126.29 126.49 124.54 124.40 121.52 122.73 122.29 123.66 124.17
##  [71] 125.93 124.87 123.89 122.93 122.35 122.00 121.42 120.40 120.93 121.53
##  [81] 120.51 119.27 116.80 119.94 122.15 121.80 122.51 121.50 121.83 120.38
##  [91] 119.31 120.38 120.32 120.00 120.22 120.05 120.76 122.94 124.25 125.04
## [101] 123.36 123.13 124.45 123.99 122.34 123.81 123.72 125.45 124.99 125.20
## [111] 123.70 124.99 123.02 123.11 123.50 123.85 124.67 125.88 128.12 127.97
## [121] 127.19 126.23 126.93 125.71 125.67 126.30 125.98 125.84 123.97 122.61
## [131] 123.71 122.91 123.19 122.49 122.29 119.20 120.48 121.79 120.63 119.17
## [141] 118.59 118.90 121.15 122.41 123.22 119.79 119.33 119.40 117.99 113.91
## [151] 114.51 113.65 114.49 115.31 115.31 116.59 115.19 113.01 114.81 113.29
## [161] 115.68 116.95 115.27 115.48 113.30 112.47 113.25 113.14 112.38 113.18
## [171] 112.67 113.73 115.46 115.30 115.06 114.65 115.30 114.89 115.88 116.86
## [181] 118.75 116.13 116.29 117.67 117.36 115.80 114.12 113.68 112.63 108.11
## [191] 108.90 107.03 108.98 106.40 106.86 108.34 106.07 105.75 109.73 111.99
## [201] 111.30 114.44 114.45 113.00 111.64 111.99 111.71 108.99 107.86 107.73
plot(actual, pred)

# Then I checked the model performance by plotting a graph between actual and predicted values
# From the graph we can see that the actual and the predicted values are much more closer to each other so we can say our model is accurate

diff <- abs(actual-pred)
correct = 0
for(i in diff)
{
  if(i<4.5)
  {
    correct = correct + 1
  }
}
correct/length(diff)*100
## [1] 88.57143
# Project - End

# Links: - 

# Project Source Code Link: -
# https://github.com/Sahil0705/Documents/tree/main/19BCE1353_CSE3505_FDA_Project_Source_Code

# Project RShiny and Implementation Demo Video Link: -
# https://drive.google.com/file/d/1amJS4bpuZwp1nOBvdtAd68s2PspU7pYn/view?usp=sharing